{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "53a99672",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   种类   产地   水果  数量  价格\n",
      "0  水果   朝鲜   橘子   3   2\n",
      "1  水果   中国   苹果   5   5\n",
      "2  水果   缅甸  哈密瓜   5  12\n",
      "3  蔬菜   中国   番茄   3   3\n",
      "4  蔬菜  菲律宾   椰子   2   4\n",
      "5  肉类   韩国   鱼肉  15  18\n",
      "6  肉类   中国   牛肉   9  20\n",
      "          数量         价格\n",
      "0   4.333333   6.333333\n",
      "1   4.333333   6.333333\n",
      "2   4.333333   6.333333\n",
      "3   2.500000   3.500000\n",
      "4   2.500000   3.500000\n",
      "5  12.000000  19.000000\n",
      "6  12.000000  19.000000\n",
      "         数量        价格\n",
      "0 -1.333333 -4.333333\n",
      "1  0.666667 -1.333333\n",
      "2  0.666667  5.666667\n",
      "3  0.500000 -0.500000\n",
      "4 -0.500000  0.500000\n",
      "5  3.000000 -1.000000\n",
      "6 -3.000000  1.000000\n",
      "      种类  产地  水果  数量  价格\n",
      "种类                      \n",
      "水果 0  水果  朝鲜  橘子   3   2\n",
      "肉类 5  肉类  韩国  鱼肉  15  18\n",
      "蔬菜 3  蔬菜  中国  番茄   3   3\n",
      "{'水果': [0, 1, 2], '肉类': [5, 6], '蔬菜': [3, 4]}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_65100\\101567315.py:11: FutureWarning: Dropping invalid columns in DataFrameGroupBy.transform is deprecated. In a future version, a TypeError will be raised. Before calling .transform, select only columns which should be valid for the function.\n",
      "  print(df.groupby('种类').transform(np.mean))\n",
      "C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_65100\\101567315.py:14: FutureWarning: Dropping invalid columns in DataFrameGroupBy.transform is deprecated. In a future version, a TypeError will be raised. Before calling .transform, select only columns which should be valid for the function.\n",
      "  print(df.groupby('种类').transform(demean))\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "df = pd.DataFrame({'种类':['水果','水果','水果','蔬菜','蔬菜','肉类','肉类'],\n",
    "                '产地':['朝鲜','中国','缅甸','中国','菲律宾','韩国','中国'],\n",
    "                '水果':['橘子','苹果','哈密瓜','番茄','椰子','鱼肉','牛肉'],\n",
    "                '数量':[3,5,5,3,2,15,9],\n",
    "                '价格':[2,5,12,3,4,18,20]})\n",
    "print(df)\n",
    "#分组求均值，水果、蔬菜、肉类\n",
    "#对可执行计算的数值列求均值\n",
    "print(df.groupby('种类').transform(np.mean))\n",
    "#transform()直接应用demean，实现去均值操作\n",
    "demean = lambda arr:arr-arr.mean()\n",
    "print(df.groupby('种类').transform(demean))\n",
    "#自定义函数\n",
    "# 返回分组的前n行数据\n",
    "def get_rows(df,n): \n",
    "     #从1到n行的所有列\n",
    "    return df.iloc[:n,:]\n",
    "#分组后的组名作为行索引\n",
    "print(df.groupby('种类').apply(get_rows,n=1))\n",
    "print(df.groupby('种类').groups)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "504bf2a8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "种类\n",
      "水果    3\n",
      "肉类    2\n",
      "蔬菜    2\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print(df.groupby('种类').size())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "81b87089",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'C:\\\\Users\\\\Administrator\\\\seaborn-data'"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import seaborn as sns\n",
    "sns.utils.get_data_home()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "be8a6b16",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_tips = sns.load_dataset('tips')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "20966d31",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>sex</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>16.99</td>\n",
       "      <td>1.01</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10.34</td>\n",
       "      <td>1.66</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>21.01</td>\n",
       "      <td>3.50</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23.68</td>\n",
       "      <td>3.31</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24.59</td>\n",
       "      <td>3.61</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   total_bill   tip     sex smoker  day    time  size\n",
       "0       16.99  1.01  Female     No  Sun  Dinner     2\n",
       "1       10.34  1.66    Male     No  Sun  Dinner     3\n",
       "2       21.01  3.50    Male     No  Sun  Dinner     3\n",
       "3       23.68  3.31    Male     No  Sun  Dinner     2\n",
       "4       24.59  3.61  Female     No  Sun  Dinner     4"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_tips.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "id": "869781d5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "group;      total_bill   tip   sex smoker  day    time  size    day_avg\n",
      "1         10.34  1.66  Male     No  Sun  Dinner     3  21.410000\n",
      "2         21.01  3.50  Male     No  Sun  Dinner     3  21.410000\n",
      "3         23.68  3.31  Male     No  Sun  Dinner     2  21.410000\n",
      "5         25.29  4.71  Male     No  Sun  Dinner     4  21.410000\n",
      "6          8.77  2.00  Male     No  Sun  Dinner     2  21.410000\n",
      "..          ...   ...   ...    ...  ...     ...   ...        ...\n",
      "236       12.60  1.00  Male    Yes  Sat  Dinner     2  20.441379\n",
      "237       32.83  1.17  Male    Yes  Sat  Dinner     2  20.441379\n",
      "239       29.03  5.92  Male     No  Sat  Dinner     3  20.441379\n",
      "241       22.67  2.00  Male    Yes  Sat  Dinner     2  20.441379\n",
      "242       17.82  1.75  Male     No  Sat  Dinner     2  20.441379\n",
      "\n",
      "[157 rows x 8 columns]\n",
      "label: Male\n",
      "group;      total_bill   tip     sex smoker   day    time  size    day_avg\n",
      "0         16.99  1.01  Female     No   Sun  Dinner     2  21.410000\n",
      "4         24.59  3.61  Female     No   Sun  Dinner     4  21.410000\n",
      "11        35.26  5.00  Female     No   Sun  Dinner     4  21.410000\n",
      "14        14.83  3.02  Female     No   Sun  Dinner     2  21.410000\n",
      "16        10.33  1.67  Female     No   Sun  Dinner     3  21.410000\n",
      "..          ...   ...     ...    ...   ...     ...   ...        ...\n",
      "226       10.09  2.00  Female    Yes   Fri   Lunch     2  17.151579\n",
      "229       22.12  2.88  Female    Yes   Sat  Dinner     2  20.441379\n",
      "238       35.83  4.67  Female     No   Sat  Dinner     3  20.441379\n",
      "240       27.18  2.00  Female    Yes   Sat  Dinner     2  20.441379\n",
      "243       18.78  3.00  Female     No  Thur  Dinner     2  17.682742\n",
      "\n",
      "[87 rows x 8 columns]\n",
      "label: Female\n"
     ]
    }
   ],
   "source": [
    "groups = df_tips.groupby('sex')\n",
    "for label, group in groups:\n",
    "    print('group;', group)\n",
    "    print('label:', label)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "94f7d48e",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>sex</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10.34</td>\n",
       "      <td>1.66</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>21.01</td>\n",
       "      <td>3.50</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23.68</td>\n",
       "      <td>3.31</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>25.29</td>\n",
       "      <td>4.71</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>8.77</td>\n",
       "      <td>2.00</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   total_bill   tip   sex smoker  day    time  size\n",
       "1       10.34  1.66  Male     No  Sun  Dinner     3\n",
       "2       21.01  3.50  Male     No  Sun  Dinner     3\n",
       "3       23.68  3.31  Male     No  Sun  Dinner     2\n",
       "5       25.29  4.71  Male     No  Sun  Dinner     4\n",
       "6        8.77  2.00  Male     No  Sun  Dinner     2"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "groups.get_group('Male').head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "760319f5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sex\n",
      "Male      20.744076\n",
      "Female    18.056897\n",
      "Name: total_bill, dtype: float64\n",
      "sex\n",
      "Male      20.744076\n",
      "Female    18.056897\n",
      "Name: total_bill, dtype: float64 sex\n",
      "Male      157\n",
      "Female     87\n",
      "dtype: int64 sex\n",
      "Male      3256.82\n",
      "Female    1570.95\n",
      "Name: total_bill, dtype: float64 sex\n",
      "Male      7.25\n",
      "Female    3.07\n",
      "Name: total_bill, dtype: float64 sex\n",
      "Male      50.81\n",
      "Female    44.30\n",
      "Name: total_bill, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "mean = df_tips.groupby('sex')['total_bill'].mean()\n",
    "agg_mean = df_tips.groupby('sex')['total_bill'].agg('mean')\n",
    "size = df_tips.groupby('sex').size()\n",
    "d_sum = df_tips.groupby('sex')['total_bill'].sum()\n",
    "d_min = df_tips.groupby('sex')['total_bill'].min()\n",
    "d_max = df_tips.groupby('sex')['total_bill'].max()\n",
    "print(agg_mean)\n",
    "print(mean, size, d_sum, d_min, d_max)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "45c43c94",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>max</th>\n",
       "      <th>min</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sex</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Male</th>\n",
       "      <td>20.744076</td>\n",
       "      <td>50.81</td>\n",
       "      <td>7.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Female</th>\n",
       "      <td>18.056897</td>\n",
       "      <td>44.30</td>\n",
       "      <td>3.07</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             mean    max   min\n",
       "sex                           \n",
       "Male    20.744076  50.81  7.25\n",
       "Female  18.056897  44.30  3.07"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_tips.groupby('sex')['total_bill'].agg(['mean', 'max', 'min'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "972618a0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tip_mean</th>\n",
       "      <th>tip_max</th>\n",
       "      <th>min</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sex</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Male</th>\n",
       "      <td>20.744076</td>\n",
       "      <td>50.81</td>\n",
       "      <td>7.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Female</th>\n",
       "      <td>18.056897</td>\n",
       "      <td>44.30</td>\n",
       "      <td>3.07</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         tip_mean  tip_max   min\n",
       "sex                             \n",
       "Male    20.744076    50.81  7.25\n",
       "Female  18.056897    44.30  3.07"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 重命名列名\n",
    "df_tips.groupby('sex')['total_bill'].agg([('tip_mean','mean'),('tip_max', 'max'),'min'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "7f8b4d66",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sex</th>\n",
       "      <th>tip_mean</th>\n",
       "      <th>tip_max</th>\n",
       "      <th>min</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Male</td>\n",
       "      <td>20.744076</td>\n",
       "      <td>50.81</td>\n",
       "      <td>7.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Female</td>\n",
       "      <td>18.056897</td>\n",
       "      <td>44.30</td>\n",
       "      <td>3.07</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      sex   tip_mean  tip_max   min\n",
       "0    Male  20.744076    50.81  7.25\n",
       "1  Female  18.056897    44.30  3.07"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 去掉index\n",
    "df_tips.groupby('sex')['total_bill'].agg([('tip_mean','mean'),('tip_max', 'max'),'min']).reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "ab6499eb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "sex     day \n",
       "Male    Thur    30\n",
       "        Fri     10\n",
       "        Sat     59\n",
       "        Sun     58\n",
       "Female  Thur    32\n",
       "        Fri      9\n",
       "        Sat     28\n",
       "        Sun     18\n",
       "dtype: int64"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 同时对sex和day进行分组\n",
    "df_tips.groupby(['sex','day']).size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "98bdcc4c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>tip_mean</th>\n",
       "      <th>tip_max</th>\n",
       "      <th>tip_min</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sex</th>\n",
       "      <th>day</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">Male</th>\n",
       "      <th>Thur</th>\n",
       "      <td>18.714667</td>\n",
       "      <td>41.19</td>\n",
       "      <td>7.51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Fri</th>\n",
       "      <td>19.857000</td>\n",
       "      <td>40.17</td>\n",
       "      <td>8.58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sat</th>\n",
       "      <td>20.802542</td>\n",
       "      <td>50.81</td>\n",
       "      <td>7.74</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sun</th>\n",
       "      <td>21.887241</td>\n",
       "      <td>48.17</td>\n",
       "      <td>7.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">Female</th>\n",
       "      <th>Thur</th>\n",
       "      <td>16.715312</td>\n",
       "      <td>43.11</td>\n",
       "      <td>8.35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Fri</th>\n",
       "      <td>14.145556</td>\n",
       "      <td>22.75</td>\n",
       "      <td>5.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sat</th>\n",
       "      <td>19.680357</td>\n",
       "      <td>44.30</td>\n",
       "      <td>3.07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sun</th>\n",
       "      <td>19.872222</td>\n",
       "      <td>35.26</td>\n",
       "      <td>9.60</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              tip_mean  tip_max  tip_min\n",
       "sex    day                              \n",
       "Male   Thur  18.714667    41.19     7.51\n",
       "       Fri   19.857000    40.17     8.58\n",
       "       Sat   20.802542    50.81     7.74\n",
       "       Sun   21.887241    48.17     7.25\n",
       "Female Thur  16.715312    43.11     8.35\n",
       "       Fri   14.145556    22.75     5.75\n",
       "       Sat   19.680357    44.30     3.07\n",
       "       Sun   19.872222    35.26     9.60"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_tips.groupby(['sex','day'])['total_bill'].agg([('tip_mean','mean'),('tip_max','max'),('tip_min','min')])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "007fb2fe",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sex</th>\n",
       "      <th>day</th>\n",
       "      <th>tip_mean</th>\n",
       "      <th>tip_max</th>\n",
       "      <th>tip_min</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Male</td>\n",
       "      <td>Thur</td>\n",
       "      <td>18.714667</td>\n",
       "      <td>41.19</td>\n",
       "      <td>7.51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Male</td>\n",
       "      <td>Fri</td>\n",
       "      <td>19.857000</td>\n",
       "      <td>40.17</td>\n",
       "      <td>8.58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Male</td>\n",
       "      <td>Sat</td>\n",
       "      <td>20.802542</td>\n",
       "      <td>50.81</td>\n",
       "      <td>7.74</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Male</td>\n",
       "      <td>Sun</td>\n",
       "      <td>21.887241</td>\n",
       "      <td>48.17</td>\n",
       "      <td>7.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Female</td>\n",
       "      <td>Thur</td>\n",
       "      <td>16.715312</td>\n",
       "      <td>43.11</td>\n",
       "      <td>8.35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Female</td>\n",
       "      <td>Fri</td>\n",
       "      <td>14.145556</td>\n",
       "      <td>22.75</td>\n",
       "      <td>5.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Female</td>\n",
       "      <td>Sat</td>\n",
       "      <td>19.680357</td>\n",
       "      <td>44.30</td>\n",
       "      <td>3.07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Female</td>\n",
       "      <td>Sun</td>\n",
       "      <td>19.872222</td>\n",
       "      <td>35.26</td>\n",
       "      <td>9.60</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      sex   day   tip_mean  tip_max  tip_min\n",
       "0    Male  Thur  18.714667    41.19     7.51\n",
       "1    Male   Fri  19.857000    40.17     8.58\n",
       "2    Male   Sat  20.802542    50.81     7.74\n",
       "3    Male   Sun  21.887241    48.17     7.25\n",
       "4  Female  Thur  16.715312    43.11     8.35\n",
       "5  Female   Fri  14.145556    22.75     5.75\n",
       "6  Female   Sat  19.680357    44.30     3.07\n",
       "7  Female   Sun  19.872222    35.26     9.60"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_tips.groupby(['sex','day'])['total_bill'].agg([('tip_mean','mean'),('tip_max','max'),('tip_min','min')]).reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "5fea673d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "             tip         total_bill         \n",
      "         avg_tip max_tip   avg_bill max_bill\n",
      "sex                                         \n",
      "Male    3.089618    10.0  20.744076    50.81\n",
      "Female  2.833448     6.5  18.056897    44.30\n"
     ]
    }
   ],
   "source": [
    "# 针对不同的列做不同的聚合运算\n",
    "grouped=df_tips.groupby('sex').agg({'tip':[('avg_tip','mean'),\\\n",
    "                                  ('max_tip','max')],'total_bill':[('avg_bill','mean'),('max_bill','max')]})\n",
    "print(grouped)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "d89842b6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MultiIndex([(       'tip',  'avg_tip'),\n",
       "            (       'tip',  'max_tip'),\n",
       "            ('total_bill', 'avg_bill'),\n",
       "            ('total_bill', 'max_bill')],\n",
       "           )"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "7e22eac8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sex</th>\n",
       "      <th>avg_tip</th>\n",
       "      <th>max_tip</th>\n",
       "      <th>avg_bill</th>\n",
       "      <th>max_bill</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Male</td>\n",
       "      <td>3.089618</td>\n",
       "      <td>10.0</td>\n",
       "      <td>20.744076</td>\n",
       "      <td>50.81</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Female</td>\n",
       "      <td>2.833448</td>\n",
       "      <td>6.5</td>\n",
       "      <td>18.056897</td>\n",
       "      <td>44.30</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      sex   avg_tip  max_tip   avg_bill  max_bill\n",
       "0    Male  3.089618     10.0  20.744076     50.81\n",
       "1  Female  2.833448      6.5  18.056897     44.30"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 修改列名\n",
    "grouped.columns=['avg_tip','max_tip','avg_bill','max_bill']\n",
    "grouped.reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "a862c2ad",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sex</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Male</th>\n",
       "      <td>43.56</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Female</th>\n",
       "      <td>41.23</td>\n",
       "      <td>6.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        total_bill   tip\n",
       "sex                     \n",
       "Male         43.56  10.0\n",
       "Female       41.23   6.5"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 自定义聚合函数求值\n",
    "df_tips.groupby('sex').agg({'total_bill': lambda bill:bill.max()-bill.min(),'tip':lambda tip: tip.max()})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "67022c32",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "sex\n",
       "Male      20.744076\n",
       "Female    18.056897\n",
       "Name: total_bill, dtype: float64"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def max_deviation(s):\n",
    "    return s.mean()\n",
    "df_tips.groupby('sex')['total_bill'].agg(max_deviation)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "e71916e3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "sex\n",
       "Male      0.159236\n",
       "Female    0.080460\n",
       "Name: total_bill, dtype: float64"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def bill_between(s, low, high):\n",
    "    return s.between(low,high).mean()\n",
    "df_tips.groupby('sex')['total_bill'].agg(bill_between, 30, 60)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "330e6962",
   "metadata": {},
   "source": [
    "# 数据过滤和变换"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "59d7f69a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "17.682741935483865\n",
      "17.151578947368417\n",
      "20.441379310344825\n",
      "<class 'pandas.core.series.Series'> 19    Sat\n",
      "Name: day, dtype: category\n",
      "Categories (4, object): ['Thur', 'Fri', 'Sat', 'Sun']\n",
      "21.410000000000004\n",
      "<class 'pandas.core.series.Series'> 0    Sun\n",
      "Name: day, dtype: category\n",
      "Categories (4, object): ['Thur', 'Fri', 'Sat', 'Sun']\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>sex</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>16.99</td>\n",
       "      <td>1.01</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10.34</td>\n",
       "      <td>1.66</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>21.01</td>\n",
       "      <td>3.50</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23.68</td>\n",
       "      <td>3.31</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24.59</td>\n",
       "      <td>3.61</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>238</th>\n",
       "      <td>35.83</td>\n",
       "      <td>4.67</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>239</th>\n",
       "      <td>29.03</td>\n",
       "      <td>5.92</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>240</th>\n",
       "      <td>27.18</td>\n",
       "      <td>2.00</td>\n",
       "      <td>Female</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>241</th>\n",
       "      <td>22.67</td>\n",
       "      <td>2.00</td>\n",
       "      <td>Male</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>242</th>\n",
       "      <td>17.82</td>\n",
       "      <td>1.75</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>163 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     total_bill   tip     sex smoker  day    time  size\n",
       "0         16.99  1.01  Female     No  Sun  Dinner     2\n",
       "1         10.34  1.66    Male     No  Sun  Dinner     3\n",
       "2         21.01  3.50    Male     No  Sun  Dinner     3\n",
       "3         23.68  3.31    Male     No  Sun  Dinner     2\n",
       "4         24.59  3.61  Female     No  Sun  Dinner     4\n",
       "..          ...   ...     ...    ...  ...     ...   ...\n",
       "238       35.83  4.67  Female     No  Sat  Dinner     3\n",
       "239       29.03  5.92    Male     No  Sat  Dinner     3\n",
       "240       27.18  2.00  Female    Yes  Sat  Dinner     2\n",
       "241       22.67  2.00    Male    Yes  Sat  Dinner     2\n",
       "242       17.82  1.75    Male     No  Sat  Dinner     2\n",
       "\n",
       "[163 rows x 7 columns]"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def cu_lambda(s):\n",
    "    print(s['total_bill'].mean())\n",
    "    day_list=[]\n",
    "    if(s['total_bill'].mean() > 20):\n",
    "        print(type(s['day']), (s['day'])[0:1])\n",
    "    return s['total_bill'].mean() > 20\n",
    "df_tips.groupby('day').filter(cu_lambda)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "id": "394548be",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>sex</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "      <th>day_avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>16.99</td>\n",
       "      <td>1.01</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>21.41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10.34</td>\n",
       "      <td>1.66</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "      <td>21.41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>21.01</td>\n",
       "      <td>3.50</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "      <td>21.41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23.68</td>\n",
       "      <td>3.31</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>21.41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24.59</td>\n",
       "      <td>3.61</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "      <td>21.41</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   total_bill   tip     sex smoker  day    time  size  day_avg\n",
       "0       16.99  1.01  Female     No  Sun  Dinner     2    21.41\n",
       "1       10.34  1.66    Male     No  Sun  Dinner     3    21.41\n",
       "2       21.01  3.50    Male     No  Sun  Dinner     3    21.41\n",
       "3       23.68  3.31    Male     No  Sun  Dinner     2    21.41\n",
       "4       24.59  3.61  Female     No  Sun  Dinner     4    21.41"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_tips['day_avg'] = df_tips.groupby('day')['total_bill'].transform(lambda x: x.mean())\n",
    "df_tips.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "id": "45ce0393",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>size</th>\n",
       "      <th>day_avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>244.000000</td>\n",
       "      <td>244.000000</td>\n",
       "      <td>244.000000</td>\n",
       "      <td>244.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>19.785943</td>\n",
       "      <td>2.998279</td>\n",
       "      <td>2.569672</td>\n",
       "      <td>19.785943</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>8.902412</td>\n",
       "      <td>1.383638</td>\n",
       "      <td>0.951100</td>\n",
       "      <td>1.627871</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>3.070000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>17.151579</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>13.347500</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>17.682742</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>17.795000</td>\n",
       "      <td>2.900000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>20.441379</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>24.127500</td>\n",
       "      <td>3.562500</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>21.410000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>50.810000</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>21.410000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       total_bill         tip        size     day_avg\n",
       "count  244.000000  244.000000  244.000000  244.000000\n",
       "mean    19.785943    2.998279    2.569672   19.785943\n",
       "std      8.902412    1.383638    0.951100    1.627871\n",
       "min      3.070000    1.000000    1.000000   17.151579\n",
       "25%     13.347500    2.000000    2.000000   17.682742\n",
       "50%     17.795000    2.900000    2.000000   20.441379\n",
       "75%     24.127500    3.562500    3.000000   21.410000\n",
       "max     50.810000   10.000000    6.000000   21.410000"
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_tips.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "id": "be2dc356",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>x</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>x</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>x</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>y</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>y</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>y</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>y</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a  b\n",
       "0  x  1\n",
       "1  x  1\n",
       "2  x  2\n",
       "3  y  3\n",
       "4  y  3\n",
       "5  y  4\n",
       "6  y  4"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame({'a':['x']*3+['y']*4,'b':[1,1,2,3,3,4,4]})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "id": "ab0b89a8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[[ True,  True,  True],\n",
       "        [ True,  True,  True],\n",
       "        [ True,  True,  True]]])"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "l = [True]*3\n",
    "r = np.array([[l]*3])\n",
    "r"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "id": "f0a1885c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ True,  True,  True],\n",
       "       [ True,  True,  True],\n",
       "       [ True,  True,  True]])"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.ones((3,3),dtype=bool)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "id": "dad0454d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 0  2  4  6  8 10 12 14 16 18]\n",
      "(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int64),)\n",
      "[ 0  2  4  6  8 10 12 14 16 18]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1])"
      ]
     },
     "execution_count": 142,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr = np.arange(0,20,2)\n",
    "print(arr)\n",
    "out = np.where(arr%2==1, -1, arr)\n",
    "print(out)\n",
    "print(arr)\n",
    "arr[arr%2==0] = -1\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "id": "e319dae1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0, 1],\n",
       "       [2, 3],\n",
       "       [4, 5],\n",
       "       [6, 7],\n",
       "       [8, 9]])"
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr=np.arange(10)\n",
    "arr.reshape(5,-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "id": "aae1b364",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1, 1, 1, 1, 1],\n",
       "       [1, 1, 1, 1, 1]])"
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "brr = np.repeat(1,10).reshape(2,-1)\n",
    "brr\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "id": "911bedb6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0, 1, 2, 3, 4, 1, 1, 1, 1, 1],\n",
       "       [5, 6, 7, 8, 9, 1, 1, 1, 1, 1]])"
      ]
     },
     "execution_count": 118,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.concatenate((arr.reshape(2,-1),brr)) \n",
    "np.vstack((arr.reshape(2,-1),brr))\n",
    "np.hstack((arr.reshape(2,-1),brr))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "id": "d4ad4eac",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "int32\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'1.21.5'"
      ]
     },
     "execution_count": 120,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = np.array([1,2,3])\n",
    "print(a.dtype)\n",
    "np.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "id": "7198aa78",
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'function' object has no attribute 'values'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_65100\\516591759.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtypename\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m: 'function' object has no attribute 'values'"
     ]
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "id": "22c15562",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.linspace(0,1,10, endpoint=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "id": "f8bb8bd5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([  1.        ,   3.16227766,  10.        ,  31.6227766 ,\n",
       "       100.        ])"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.logspace(0,2,5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "id": "36f9cc7a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1 2 3]\n",
      " [4 5 6]]\n",
      "[9 2 3 4 5 6]\n",
      "[[9 2 3]\n",
      " [4 5 6]]\n"
     ]
    }
   ],
   "source": [
    "a = np.array([1,2,3,4,5,6])\n",
    "b = a.reshape(2,3)\n",
    "print(b)\n",
    "a[0] = 9\n",
    "print(a)\n",
    "print(b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "id": "211b807f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([9, 9, 9, 9, 9, 9, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,\n",
       "       4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6])"
      ]
     },
     "execution_count": 128,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a.repeat(6,axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "id": "2fe454a2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1, 5)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([[0, 1, 2, 3, 4],\n",
       "       [0, 1, 2, 3, 4],\n",
       "       [0, 1, 2, 3, 4],\n",
       "       [0, 1, 2, 3, 4],\n",
       "       [0, 1, 2, 3, 4],\n",
       "       [0, 1, 2, 3, 4]])"
      ]
     },
     "execution_count": 135,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b = np.arange(0, 5).reshape(1,5)\n",
    "print(b.shape)\n",
    "b.repeat(6,axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "id": "932ac885",
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "_tile_dispatcher() got an unexpected keyword argument 'axis'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_65100\\3010830012.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m6\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m<__array_function__ internals>\u001b[0m in \u001b[0;36mtile\u001b[1;34m(*args, **kwargs)\u001b[0m\n",
      "\u001b[1;31mTypeError\u001b[0m: _tile_dispatcher() got an unexpected keyword argument 'axis'"
     ]
    }
   ],
   "source": [
    "np.tile(b,6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "id": "8ea52216",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([ 5,  6,  7,  8,  9, 10], dtype=int64),)"
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a= np.arange(15)\n",
    "b = np.where((a>=5)&(a<=10))\n",
    "b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "id": "a50cade4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0, 1, 2],\n",
       "       [3, 4, 5],\n",
       "       [6, 7, 8]])"
      ]
     },
     "execution_count": 145,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr = np.arange(9).reshape(3,3)\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "id": "40bce429",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1, 0, 2],\n",
       "       [4, 3, 5],\n",
       "       [7, 6, 8]])"
      ]
     },
     "execution_count": 151,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "brr = arr[:,[1,0,2]]\n",
    "brr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "id": "2433da8d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[7.41240598 7.22915675 5.68978972]\n",
      " [7.94560746 6.17700188 6.02044164]\n",
      " [9.15400216 6.10641323 8.72827672]\n",
      " [5.74361766 5.02839807 5.18212757]\n",
      " [8.56802713 8.15362109 9.6297368 ]]\n",
      "[[7.412 7.229 5.69 ]\n",
      " [7.946 6.177 6.02 ]\n",
      " [9.154 6.106 8.728]\n",
      " [5.744 5.028 5.182]\n",
      " [8.568 8.154 9.63 ]]\n"
     ]
    }
   ],
   "source": [
    "# 创建一个5×3的二维数组，以包含5到10之间的随机浮点数。\n",
    "a = np.random.randint(low=5,high=10,size=(5,3))+ np.random.random(size=(5,3))\n",
    "print(a)\n",
    "np.set_printoptions(precision=3,suppress=True)\n",
    "print(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "id": "5dd9dcfa",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[5.1 3.5 1.4 0.2]\n",
      " [4.9 3.  1.4 0.2]\n",
      " [4.7 3.2 1.3 0.2]\n",
      " [4.6 3.1 1.5 0.2]\n",
      " [5.  3.6 1.4 0.2]\n",
      " [5.4 3.9 1.7 0.4]\n",
      " [4.6 3.4 1.4 0.3]\n",
      " [5.  3.4 1.5 0.2]\n",
      " [4.4 2.9 1.4 0.2]\n",
      " [4.9 3.1 1.5 0.1]\n",
      " [5.4 3.7 1.5 0.2]\n",
      " [4.8 3.4 1.6 0.2]\n",
      " [4.8 3.  1.4 0.1]\n",
      " [4.3 3.  1.1 0.1]\n",
      " [5.8 4.  1.2 0.2]\n",
      " [5.7 4.4 1.5 0.4]\n",
      " [5.4 3.9 1.3 0.4]\n",
      " [5.1 3.5 1.4 0.3]\n",
      " [5.7 3.8 1.7 0.3]\n",
      " [5.1 3.8 1.5 0.3]\n",
      " [5.4 3.4 1.7 0.2]\n",
      " [5.1 3.7 1.5 0.4]\n",
      " [4.6 3.6 1.  0.2]\n",
      " [5.1 3.3 1.7 0.5]\n",
      " [4.8 3.4 1.9 0.2]\n",
      " [5.  3.  1.6 0.2]\n",
      " [5.  3.4 1.6 0.4]\n",
      " [5.2 3.5 1.5 0.2]\n",
      " [5.2 3.4 1.4 0.2]\n",
      " [4.7 3.2 1.6 0.2]\n",
      " [4.8 3.1 1.6 0.2]\n",
      " [5.4 3.4 1.5 0.4]\n",
      " [5.2 4.1 1.5 0.1]\n",
      " [5.5 4.2 1.4 0.2]\n",
      " [4.9 3.1 1.5 0.2]\n",
      " [5.  3.2 1.2 0.2]\n",
      " [5.5 3.5 1.3 0.2]\n",
      " [4.9 3.6 1.4 0.1]\n",
      " [4.4 3.  1.3 0.2]\n",
      " [5.1 3.4 1.5 0.2]\n",
      " [5.  3.5 1.3 0.3]\n",
      " [4.5 2.3 1.3 0.3]\n",
      " [4.4 3.2 1.3 0.2]\n",
      " [5.  3.5 1.6 0.6]\n",
      " [5.1 3.8 1.9 0.4]\n",
      " [4.8 3.  1.4 0.3]\n",
      " [5.1 3.8 1.6 0.2]\n",
      " [4.6 3.2 1.4 0.2]\n",
      " [5.3 3.7 1.5 0.2]\n",
      " [5.  3.3 1.4 0.2]\n",
      " [7.  3.2 4.7 1.4]\n",
      " [6.4 3.2 4.5 1.5]\n",
      " [6.9 3.1 4.9 1.5]\n",
      " [5.5 2.3 4.  1.3]\n",
      " [6.5 2.8 4.6 1.5]\n",
      " [5.7 2.8 4.5 1.3]\n",
      " [6.3 3.3 4.7 1.6]\n",
      " [4.9 2.4 3.3 1. ]\n",
      " [6.6 2.9 4.6 1.3]\n",
      " [5.2 2.7 3.9 1.4]\n",
      " [5.  2.  3.5 1. ]\n",
      " [5.9 3.  4.2 1.5]\n",
      " [6.  2.2 4.  1. ]\n",
      " [6.1 2.9 4.7 1.4]\n",
      " [5.6 2.9 3.6 1.3]\n",
      " [6.7 3.1 4.4 1.4]\n",
      " [5.6 3.  4.5 1.5]\n",
      " [5.8 2.7 4.1 1. ]\n",
      " [6.2 2.2 4.5 1.5]\n",
      " [5.6 2.5 3.9 1.1]\n",
      " [5.9 3.2 4.8 1.8]\n",
      " [6.1 2.8 4.  1.3]\n",
      " [6.3 2.5 4.9 1.5]\n",
      " [6.1 2.8 4.7 1.2]\n",
      " [6.4 2.9 4.3 1.3]\n",
      " [6.6 3.  4.4 1.4]\n",
      " [6.8 2.8 4.8 1.4]\n",
      " [6.7 3.  5.  1.7]\n",
      " [6.  2.9 4.5 1.5]\n",
      " [5.7 2.6 3.5 1. ]\n",
      " [5.5 2.4 3.8 1.1]\n",
      " [5.5 2.4 3.7 1. ]\n",
      " [5.8 2.7 3.9 1.2]\n",
      " [6.  2.7 5.1 1.6]\n",
      " [5.4 3.  4.5 1.5]\n",
      " [6.  3.4 4.5 1.6]\n",
      " [6.7 3.1 4.7 1.5]\n",
      " [6.3 2.3 4.4 1.3]\n",
      " [5.6 3.  4.1 1.3]\n",
      " [5.5 2.5 4.  1.3]\n",
      " [5.5 2.6 4.4 1.2]\n",
      " [6.1 3.  4.6 1.4]\n",
      " [5.8 2.6 4.  1.2]\n",
      " [5.  2.3 3.3 1. ]\n",
      " [5.6 2.7 4.2 1.3]\n",
      " [5.7 3.  4.2 1.2]\n",
      " [5.7 2.9 4.2 1.3]\n",
      " [6.2 2.9 4.3 1.3]\n",
      " [5.1 2.5 3.  1.1]\n",
      " [5.7 2.8 4.1 1.3]\n",
      " [6.3 3.3 6.  2.5]\n",
      " [5.8 2.7 5.1 1.9]\n",
      " [7.1 3.  5.9 2.1]\n",
      " [6.3 2.9 5.6 1.8]\n",
      " [6.5 3.  5.8 2.2]\n",
      " [7.6 3.  6.6 2.1]\n",
      " [4.9 2.5 4.5 1.7]\n",
      " [7.3 2.9 6.3 1.8]\n",
      " [6.7 2.5 5.8 1.8]\n",
      " [7.2 3.6 6.1 2.5]\n",
      " [6.5 3.2 5.1 2. ]\n",
      " [6.4 2.7 5.3 1.9]\n",
      " [6.8 3.  5.5 2.1]\n",
      " [5.7 2.5 5.  2. ]\n",
      " [5.8 2.8 5.1 2.4]\n",
      " [6.4 3.2 5.3 2.3]\n",
      " [6.5 3.  5.5 1.8]\n",
      " [7.7 3.8 6.7 2.2]\n",
      " [7.7 2.6 6.9 2.3]\n",
      " [6.  2.2 5.  1.5]\n",
      " [6.9 3.2 5.7 2.3]\n",
      " [5.6 2.8 4.9 2. ]\n",
      " [7.7 2.8 6.7 2. ]\n",
      " [6.3 2.7 4.9 1.8]\n",
      " [6.7 3.3 5.7 2.1]\n",
      " [7.2 3.2 6.  1.8]\n",
      " [6.2 2.8 4.8 1.8]\n",
      " [6.1 3.  4.9 1.8]\n",
      " [6.4 2.8 5.6 2.1]\n",
      " [7.2 3.  5.8 1.6]\n",
      " [7.4 2.8 6.1 1.9]\n",
      " [7.9 3.8 6.4 2. ]\n",
      " [6.4 2.8 5.6 2.2]\n",
      " [6.3 2.8 5.1 1.5]\n",
      " [6.1 2.6 5.6 1.4]\n",
      " [7.7 3.  6.1 2.3]\n",
      " [6.3 3.4 5.6 2.4]\n",
      " [6.4 3.1 5.5 1.8]\n",
      " [6.  3.  4.8 1.8]\n",
      " [6.9 3.1 5.4 2.1]\n",
      " [6.7 3.1 5.6 2.4]\n",
      " [6.9 3.1 5.1 2.3]\n",
      " [5.8 2.7 5.1 1.9]\n",
      " [6.8 3.2 5.9 2.3]\n",
      " [6.7 3.3 5.7 2.5]\n",
      " [6.7 3.  5.2 2.3]\n",
      " [6.3 2.5 5.  1.9]\n",
      " [6.5 3.  5.2 2. ]\n",
      " [6.2 3.4 5.4 2.3]\n",
      " [5.9 3.  5.1 1.8]]\n",
      "[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
      " 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
      " 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2\n",
      " 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n",
      " 2 2]\n",
      "['setosa' 'versicolor' 'virginica']\n"
     ]
    }
   ],
   "source": [
    "from sklearn import datasets\n",
    "iris = datasets.load_iris()\n",
    "print(iris.data)\n",
    "print(iris.target)\n",
    "print(iris.target_names)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f12febca",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
